GlowwormScaleComp = function(Input, MetaData = Input@meta.data, ClassName, SubclassName = NA, Genes, GlowwormGenesOutput = NA, PercentCutoff = 0.10, Linearity = "Auto", Verbose = T, DataSlot, CellNorm, SecondaryGenes = 100, PopScale =T){
if(any(Genes == names(GlowwormGenesOutput))){GeneList = GlowwormGenesOutput[[Genes]]}else if(is.na(GlowwormGenesOutput)){GeneList = Genes}else{stop("If using GlowwormGenes to determine gene inputs the GlowwormGenesOutput parameter should be set to the output of GlowwormGenes, and the Genes parameter should indicate 'UniqueGenesinWindow' or 'NearestNeighborGene'")
}
if(length(unique(GeneList))== 0){stop("No genes in input - check results of GlowwormGenesOutput")}
MetaData <- dplyr::rename(MetaData, Class = all_of(ClassName))
if(is.na(SubclassName)){
MetaDataComb = MetaData %>% dplyr::select(Class)
}else{
MetaData <- dplyr::rename(MetaData, Subclass = all_of(SubclassName))
MetaDataComb = MetaData %>% dplyr::select(Class, Subclass)
}
if(class(Input) =="Seurat"){
PullMeta = Input@meta.data %>% dplyr::select(nFeature_RNA, nCount_RNA)
MetaDataComb = merge(MetaDataComb, PullMeta, by =0)
row.names(MetaDataComb) = MetaDataComb$Row.names
MetaDataComb$Row.names = NULL
}
OutputList = list()
OutputList[["MetaData"]] = MetaDataComb
if(class(Input) =="Seurat"){
GetGenes = subset(GeneList, GeneList %in% row.names(Input@assays$RNA@counts))
GenesNotIn = subset(GeneList, ! GeneList %in% row.names(Input@assays$RNA@counts))
if(class(SecondaryGenes) == "character"){
SecondaryGenes2 = subset(SecondaryGenes, SecondaryGenes %in% row.names(Input@assays$RNA@counts))
SecGenes = subset(row.names(Input@assays$RNA@counts), row.names(Input@assays$RNA@counts) %in% SecondaryGenes2 & ! row.names(Input@assays$RNA@counts) %in% GetGenes)
BackgroundType = "selected"
}else if(class(SecondaryGenes) %in% c("numeric", "integer") & SecondaryGenes > 0){
MinClustSize = as.data.frame(table(MetaData$Subclass))
MinClustSize = MinClustSize[order(MinClustSize$Freq), ]
ExpByGene = as.data.frame(rowSums(Input@assays$RNA@counts != 0))
colnames(ExpByGene) = "Ncells"
ExpGenes = subset(ExpByGene, ExpByGene$Ncells > MinClustSize[[2]][[1]])
RestGenes = subset(row.names(ExpGenes), ! row.names(ExpGenes) %in% GeneList)
NSecondary = SecondaryGenes
set.seed(123)
SecGenes = sort(sample(RestGenes, NSecondary, replace=F))
BackgroundType = "randomly generated"
}else if(length(SecondaryGenes) == 0){stop("SecondaryGenes should contain either the number of randomly selected background genes to use genes, or a string of genes to use as secondary genes. At least an equal number is recommended, but default is set to 100")}
DefaultAssay(Input) = "RNA"
Subset_Data = FetchData(Input, vars = c(GetGenes, SecGenes), cells = row.names(MetaData), slot = DataSlot)
Subset_Data2 = merge(Subset_Data, PullMeta, by = 0)
row.names(Subset_Data2) = Subset_Data2$Row.names
Subset_Data2$Row.names = NULL
#row.names(Subset_Data2) = Subset_Data2$Row.names
#Subset_Data2$Row.names = NULL
#SeuFile_Reduced = subset(Input, cells = row.names(MetaDataComb))
#GetGenes = subset(GeneList, GeneList %in% row.names(SeuFile_Reduced@assays$RNA@counts))
#Subset_Data = SeuFile_Reduced@assays$RNA@counts[GetGenes, ] #Genes as row names
}else if("dgCMatrix" == class(Input)){
Input = as.matrix(Input)
GetGenes = subset(GeneList, GeneList %in% row.names(Input))
GetBarcs = subset(MetaData, row.names(MetaDataComb) %in% colnames(Input))
GetBarcs = row.names(GetBarcs)
Subset_Data = Input[GetGenes, GetBarcs] #Genes as row names
}else if("data.frame" == class(Input)){
Input = as.matrix(Input)
GetGenes = subset(GeneList, GeneList %in% row.names(Input))
GetBarcs = subset(MetaData, row.names(MetaDataComb) %in% colnames(Input))
GetBarcs = row.names(GetBarcs)
Subset_Data = Input[GetGenes, GetBarcs] #Genes as row names
}else if("matrix" == class(Input)[1]){
GetGenes = subset(GeneList, GeneList %in% row.names(Input))
GetBarcs = subset(MetaData, row.names(MetaDataComb) %in% colnames(Input))
GetBarcs = row.names(GetBarcs)
Subset_Data = Input[GetGenes, GetBarcs] #Genes as row names
}else{stop("Input file needs to be a Seurat File, or a matrix, sparse matrix or data frame with genes as row names and barcodes as column names")}
Settings = list(#"InputFile" = as.character(substitute(Input)),
"SecondaryGenes" = SecGenes,
"N_Classes" = length(unique(MetaDataComb$Class)),
"N_Subclasses" = length(unique(MetaDataComb$Subclass)))
if(CellNorm == "Count"){
Subset_Data2$nFeature_RNA = NULL
Subset_Data2 = Subset_Data2/Subset_Data2$nCount_RNA
Subset_Data2$nCount_RNA = NULL
# Subset_Data = as.data.frame(t(Subset_Data2))
}else if(CellNorm == "Feat"){
Subset_Data2$nCount_RNA = NULL
Subset_Data2 = Subset_Data2/Subset_Data2$nFeature_RNA
Subset_Data2$nFeature_RNA = NULL
Subset_Data = as.data.frame(t(Subset_Data2))
}else if(CellNorm == "Ratio"){
Subset_Data2$Ratio = Subset_Data2$nCount_RNA/Subset_Data2$nFeature_RNA
Subset_Data2$nCount_RNA = NULL; Subset_Data2$nFeature_RNA = NULL
Subset_Data2 = Subset_Data2/Subset_Data2$Ratio
Subset_Data2$Ratio = NULL
Subset_Data = as.data.frame(t(Subset_Data2))
}else if(CellNorm == "None"){
Subset_Data2$nCount_RNA = NULL; Subset_Data2$nFeature_RNA = NULL
Subset_Data = as.data.frame(t(Subset_Data2))
}
if(Verbose == T){cat("| ## | Preprocessing complete")}
#row.names(Subset_Data2) = Subset_Data2$Row.names; Subset_Data2$Row.names = NULL
DataMatrix_Zscore = as.data.frame(sapply(Subset_Data2, function(Subset_Data2) (Subset_Data2-mean(Subset_Data2))/sd(Subset_Data2)))
row.names(DataMatrix_Zscore) = row.names(Subset_Data2)
DataMatrix_Zscore[is.na(DataMatrix_Zscore)] = 0
DataMatrix_Zscore_t = as.data.frame(t(DataMatrix_Zscore))
if(Verbose == T){cat("\n| #### | Scaling complete")}
if(is.na(SubclassName) == F){
MetaDataComb$Combined = paste(MetaDataComb[["Subclass"]], MetaDataComb[["Class"]], sep="^")
MetaDataComb[["Subclass"]] = NULL
MetaDataComb[["Class"]] = NULL
}else{
MetaDataComb$Combined = paste(MetaDataComb[["Class"]], MetaDataComb[["Class"]], sep="^")
MetaDataComb[["Class"]] = NULL
}
Settings[["N_TotalClusters"]] = length(unique(MetaDataComb$Combined))
Settings[["PercentCutoff"]] = PercentCutoff
DataMatrix_Zscore_wAssigns = merge(DataMatrix_Zscore, MetaDataComb, by = 0)
DataMatrix_Zscore_wAssigns$nFeature_RNA = NULL
DataMatrix_Zscore_wAssigns$nCount_RNA = NULL
row.names(DataMatrix_Zscore_wAssigns) = DataMatrix_Zscore_wAssigns$Row.names
DataMatrix_Zscore_wAssigns$Row.names = NULL
DataMatrix_Zscore_wAssigns[DataMatrix_Zscore_wAssigns <= 0] <- 0
if(Verbose == T){cat("\n| ###### | Metadata filtering complete")}
ScreenedData = as.data.frame(matrix(ncol = length(GetGenes)+1, nrow = 0))
colnames(ScreenedData) = c(GetGenes, "Combined")
ForLong = DataMatrix_Zscore_wAssigns
ForLong$Combined = NULL
DataLong = melt(ForLong)
DataLong$Combined = DataMatrix_Zscore_wAssigns$Combined
DataLong$Barcs = row.names(ForLong)
DataLong$Count = 1
DataLong$Binary = ifelse(DataLong$value > 0, 1, 0)
DataLong$GenePop = paste(DataLong$variable, DataLong$Combined)
GetCutoffs = DataLong %>% group_by(variable, Combined) %>% dplyr::summarise("PC_exp" = sum(Binary)/sum(Count))
PassedCutoff = subset(GetCutoffs, GetCutoffs$PC_exp >= PercentCutoff)
PassedCutoff$GenePop = paste(PassedCutoff$variable, PassedCutoff$Combined)
DataLong$Data = ifelse(! DataLong$GenePop %in% PassedCutoff$GenePop, 0, DataLong$value) ##Changed JUN14
DataLong$GenePop = NULL
DataLong$Binary = NULL
DataLong$Count = NULL
if(PopScale == T){
OutputList[["GlowwormScaleAllCells_Target"]] = subset(DataLong, DataLong$variable %in% GetGenes)
OutputList[["GlowwormScaleAllCells_Secondary"]] = subset(DataLong, DataLong$variable %in% SecGenes)
}else if(PopScale == F){
ZScoreLong = gather(DataMatrix_Zscore, "variable", "Data")
ZScoreLong$Barcs = row.names(DataMatrix_Zscore)
ZScoreLong = merge(ZScoreLong, MetaDataComb, by.x = "Barcs", by.y = 0)
ZScoreLong$nFeature_RNA = NULL; ZScoreLong$nCount_RNA = NULL
OutputList[["GlowwormScaleAllCells_Target"]] = subset(ZScoreLong, ZScoreLong$variable %in% GetGenes)
OutputList[["GlowwormScaleAllCells_Secondary"]] = subset(ZScoreLong, ZScoreLong$variable %in% SecGenes)
}
# aqm <- melt(DataLong, id=c("Combined", "variable" , "Data"), na.rm=TRUE)
# suppressMessages({
# ScreenedData <- dcast(aqm, Combined~variable,mean,id.vars = NULL) ##Error
# })
ScreenedData <- dcast(DataLong, Barcs~variable, value.var = "Data")
ScreenedData = merge(ScreenedData, MetaDataComb, by.x = "Barcs", by.y = 0)
row.names(ScreenedData) = ScreenedData$Barcs
ScreenedData$Barcs = NULL
ScreenedData$nFeature_RNA = NULL
ScreenedData$nCount_RNA = NULL
#OutputList[["GlowwormScaleAllCells_Target"]] = ScreenedData %>% dplyr::select(GetGenes, Combined)
#OutputList[["GlowwormScaleAllCells_Secondary"]] = ScreenedData %>% dplyr::select(SecondaryGenes, Combined)
if(Verbose == T){cat("\n| ######## | Background correction complete")}
DataMatrix_Zscore_Reduced = ScreenedData %>% group_by(Combined) %>% summarise_all(mean, na.rm = TRUE)
DataMatrix_Zscore_Reduced = as.data.frame(DataMatrix_Zscore_Reduced)
row.names(DataMatrix_Zscore_Reduced) = DataMatrix_Zscore_Reduced$Combined
DataMatrix_Zscore_Reduced$Combined = NULL
DataMatrix_Zscore_Reduced[DataMatrix_Zscore_Reduced < 0] <- 0
DataMatrix_Zscore_Reduced$Subclass = gsub("\\^.*", "", row.names(DataMatrix_Zscore_Reduced))
DataMatrix_Zscore_Reduced$Class = gsub(".*\\^", "", row.names(DataMatrix_Zscore_Reduced))
OutputList[["GlowwormScaleOutput"]] = DataMatrix_Zscore_Reduced
OutputList[["GlowwormScaleOutput_Target"]] = DataMatrix_Zscore_Reduced %>% dplyr::select(GetGenes)
OutputList[["GlowwormScaleOutput_Secondary"]] = DataMatrix_Zscore_Reduced %>% dplyr::select(SecGenes)
DataMatrix_Zscore_Reduced$Subclass = NULL
DataMatrix_Zscore_Reduced$Class = NULL
GlowwormScaleOutput_t = as.data.frame(t(DataMatrix_Zscore_Reduced))
SumStats = as.data.frame(rowSums(GlowwormScaleOutput_t))
colnames(SumStats) = "Total"
SumStats$StDev = rowSds(as.matrix(GlowwormScaleOutput_t))
##
Binary = DataMatrix_Zscore_Reduced
Binary = as.data.frame(ifelse(Binary > 0, 1, 0))
Binary$Class = gsub(".*\\^", "", row.names(Binary))
Pull = Binary %>% group_by(Class) %>% summarise_all(sum)
if(dim(Pull)[1] > 1){
Pull = as.data.frame(Pull)
row.names(Pull) = Pull$Class
Pull$Class = NULL
Pull2 = apply(Pull, 2, function(x){x/sum(x)})
Pull3 = as.data.frame(colMaxs(Pull2))
row.names(Pull3) = colnames(Pull)
colnames(Pull3) = "FreqPerc"
Pull3[is.na(Pull3)] = 0
SumStats = merge(SumStats, Pull3, by=0)
row.names(SumStats) = SumStats$Row.names
SumStats$Row.names = NULL
Pull2 = as.data.frame(Pull2)
Pull4 = as.data.frame(ifelse(Pull2 > 0, 1, 0))
Pull5 = as.data.frame(colSums(Pull4))
SumStats$SD = SumStats$FreqPerc + SumStats$StDev
SumStats2 = subset(SumStats, SumStats$Total > 0)
}else{
SumStats$SD = SumStats$StDev
SumStats2 = subset(SumStats, SumStats$Total > 0)
Pull4 = as.data.frame(Pull)
row.names(Pull4) = Pull4$Class
Pull4$Class = NULL
Pull5 = as.data.frame(t(ifelse(Pull4 > 0, 1, 0)))
}
#if(length(Linearity) == 2){
# Expression = Linearity[[1]]
# Specificity = Linearity[[2]]
# }else if(length(Linearity) == 1){
# Expression = Linearity
# Specificity = Linearity
# }else if(length(Linearity) > 2){stop("Linearity needs to be either 'Auto', or a single transfomation (to be applied to both Expression and Specificitydata) or a vector of two values, with the first applied to Expression data and the second to Specificity data")}
#for(ES in c("Expression", "Specificity")){
#if(ES == "Expression"){
# ESCol = "Total"
# Transformation = Expression
# bc = boxcox(Total~1, data= SumStats2, plotit = F)}
# else if(ES == "Specificity"){
# ESCol = "SD"
# Transformation = Specificity
# bc = boxcox(SD~1, data= SumStats2, plotit = F)}
#lambda <- round_any(bc$x[which.max(bc$y)], 0.5)
#if(Transformation == "Auto"){
# if(lambda == 0){
# SumStats[[ESCol]] = log(SumStats[[ESCol]]+1)
# Settings[[paste(ES, "Transformation", sep="_")]] = "Auto: Log"
# if(Verbose == T){cat(paste("\n |", ES, "autocorrected by log transformation"))}
# }else if(lambda == 0.5 | lambda == -0.5){
# SumStats[[ESCol]] = sqrt(SumStats[[ESCol]])
# Settings[[paste(ES, "Transformation", sep="_")]] = "Auto: Square root"
# if(Verbose == T){cat(paste("\n |", ES, "autocorrected by square root transformation"))}
# #}else if(lambda == -0.5){
# #SumStats[[ESCol]] = 1/sqrt(SumStats[[ESCol]])
# #Settings[[paste(ES, "Transformation", sep="_")]] = "Auto: Inverse square root"
# # if(Verbose == T){cat(paste("\n |", ES, "autocorrected by inverse square root transformation"))}
# #}else if(lambda == -1){
# #SumStats[[ESCol]] = 1/SumStats[[ESCol]]
# #Settings[[paste(ES, "Transformation", sep="_")]] = "Auto: Inversion"
# #if(Verbose == T){cat(paste("\n |", ES, "autocorrected by inversion"))}
# }else if(lambda == 1 | lambda == -1){
# Settings[[paste(ES, "Transformation", sep="_")]] = "Auto: No autocorrection"
# if(Verbose == T){cat(paste("\n | No autocorrection found for", ES, "parameter"))}
# }else if(lambda > 1 | lambda < 1){
# SumStats[[ESCol]] = SumStats[[ESCol]]^lambda
# Settings[[paste(ES, "Transformation", sep="_")]] = paste("Auto: x^", lambda)
# if(Verbose == T){cat(paste("\n |", ES, "autocorrected by raising to the power of", lambda))}
# #}else if(lambda < 1){
# #SumStats[[ESCol]] = 1/SumStats[[ESCol]]^lambda
# #Settings[[paste(ES, "Transformation", sep="_")]] = paste("Auto: 1/x^", lambda)
# #if(Verbose == T){cat(paste("\n |", ES, "autocorrected by raising to the power of", lambda, "then inversion"))}
# }
# }else if(length(grep("\\^", Transformation, value = T)) > 0){ #For Exponent
# Transformation = as.numeric(gsub("\\^", "", Transformation))
# SumStats[[ESCol]] = SumStats[[ESCol]] ^Transformation
# Settings[[paste(ES, "Transformation", sep="_")]] = paste("x^", Transformation)
# }else if(length(grep("og", Transformation, value = T)) > 0){ #For Log
# Transformation = as.numeric(gsub("Log", "",gsub("log", "", Transformation)))
# if(Transformation == ""){
# SumStats[[ESCol]] = log(SumStats[[ESCol]]+1)
# Settings[[paste(ES, "Transformation", sep="_")]] = "Log"
# }else{
# SumStats[[ESCol]] = log(SumStats[[ESCol]]+1, Transformation)
# Settings[[paste(ES, "Transformation", sep="_")]] = paste("Log base", Transformation)
# }}else if(is.numeric(Transformation) == T & Transformation > 0 & ! Transformation %in% c(0,1) | is.numeric(Transformation) == T & Transformation < 0){
# Expression = as.numeric(Transformation)
# SumStats[[ESCol]] = SumStats[[ESCol]] * Transformation
# Settings[[paste(ES, "Transformation", sep="_")]] = paste("Multiplied by", Transformation)
# }else if(is.numeric(Transformation) == T & Transformation %in% c(0,1) | is.na(Transformation) == T){
# Settings[[paste(ES, "Transformation", sep="_")]] = paste("No correction for", ES)
# }
#}
SumStats[SumStats==Inf] <- 0
SumStats_KeyGenes = subset(SumStats, row.names(SumStats) %in% GetGenes)
SumStats_SecondaryGenes = subset(SumStats, row.names(SumStats) %in% SecGenes)
SumStats_KeyGenes$Total = (SumStats_KeyGenes$Total- min(SumStats_KeyGenes$Total))/(max(SumStats_KeyGenes$Total) - min(SumStats_KeyGenes$Total))
SumStats_KeyGenes$SD = (SumStats_KeyGenes$SD- min(SumStats_KeyGenes$SD))/(max(SumStats_KeyGenes$SD) - min(SumStats_KeyGenes$SD))
SumStats_KeyGenes$RankScore = SumStats_KeyGenes$Total + SumStats_KeyGenes$SD
SumStats_KeyGenes = SumStats_KeyGenes[order(-SumStats_KeyGenes$RankScore),]
SpecCut = subset(Pull5, Pull5[[1]] >= 1)
SumStats3 = subset(SumStats_KeyGenes, row.names(SumStats_KeyGenes) %in% row.names(SpecCut))
OutputList[["SumStats"]] = SumStats3
Settings[["InputGenes"]] = GeneList
Settings[["MappedGenes"]] = row.names(SumStats_KeyGenes)
Settings[["GenesNotFound"]] = subset(GeneList, ! GeneList %in% row.names(SumStats_KeyGenes))
OutputList[["SumStats"]] = SumStats_KeyGenes
colnames(Pull5) = "Specificity"
Pull5[is.na(Pull5)] = 0
OutputList[["SpecificityScore"]] = Pull5
OutputList[["Settings"]] = Settings
setClass("GlowwormObj", representation(MetaData = "data.frame", GlowwormScaleOutput = "data.frame", GlowwormScaleOutput_Secondary = "data.frame",GlowwormScaleAllCells = "data.frame", GlowwormScaleAllCells_Secondary = "data.frame", SumStats = "data.frame", SpecificityScore = "data.frame", Settings = "list"))
Output = new("GlowwormObj",
MetaData = OutputList[["MetaData"]],
GlowwormScaleOutput = OutputList[["GlowwormScaleOutput_Target"]],
GlowwormScaleOutput_Secondary = OutputList[["GlowwormScaleOutput_Secondary"]],
GlowwormScaleAllCells = OutputList[["GlowwormScaleAllCells_Target"]],
GlowwormScaleAllCells_Secondary = OutputList[["GlowwormScaleAllCells_Secondary"]],
SumStats = OutputList[["SumStats"]],
SpecificityScore = OutputList[["SpecificityScore"]],
Settings = OutputList[["Settings"]])
# OutputList[["GlowwormScaleAllCells_Secondary"]]
if(Verbose == T){cat(paste("\n| ########## | GlowwormScale complete\nFrom", length(GeneList), " input genes, ", length(GetGenes), " genes were processed.", length(GenesNotIn), " genes were not in input data.\n", length(SecondaryGenes), " ", BackgroundType, "background genes were used"), sep="")}
return(Output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.